#	------------------------------------------------------------------------------
# 	File-Name: 	1_descr_analysis.R
#	  Date:  		20/07/2020
#	  Author: 	Setu Pelz, Johannes Urpelainen
#	  Purpose:   	.R files to replicate descriptive analysis in:
#					Pelz, Setu and Johannes Urpelainen
#					"Measuring and explaining household access to electrical energy 
#         services: Evidence from rural northern India" Energy Policy.
#         DOI: https://doi.org/10.1016/j.enpol.2020.111782
#
#		All survey data come from:
#					Aklin, Michaël; Cheng, Chao-yo; Ganesan, Karthik; Jain, Abhishek; 
#         Urpelainen, Johannes, 2016, "Access to Clean Cooking Energy and 
#         Electricity: Survey of States in India (ACCESS)", 
#         https://doi.org/10.7910/DVN/0NV9LF, Harvard Dataverse, V1,
#         AND
#         Mani, Sunil; Shahidi, Tauseef; Patnaik, Sasmita; Jain, Abhishek; 
#         Tripathi, Saurabh; Ganesan, Karthik; Aklin, Michaël; Urpelainen, 
#         Johannes; Chindarkar, Namrata; Council on Energy, Environment and 
#         Water; Initiative for Sustainable Energy Policy; National University 
#         of Singapore, 2019, "Access to Clean Cooking Energy and Electricity: 
#         Survey of States in India 2018 (ACCESS 2018)", 
#         https://doi.org/10.7910/DVN/AHFINM, Harvard Dataverse, V1,
#	------------------------------------------------------------------------------

# Load Necessary Packages ------------------------------------------------------

#install.packages("pacman")
library(pacman)

### Note, the following lines install packages, double-check if this desired ###

# Tidyverse
p_load(dplyr, forcats, ggplot2, srvyr, tidyr, naniar, patchwork, stringr)

# Estimation
p_load(fixest, margins, etable)

# Misc
p_load(here, readstata13, survey, patchwork, corrplot, stargazer, readxl, dataMaid)

# Custom function
source(here::here("R", "custom_functions.R"))

# Read in ACCESS panel dataset -------------------------------------------------
source(here("R","load_data.R"))

# Set up survey design for srvyr package----------------------------------------

svydata <- pool %>%
  as_survey_design(id=hh, strata=village, weight = weights, nest = TRUE)

# Setting up panel dataset for later analysis ----------------------------------

levels_india <- c("Lighting", "ICTs", "Space Cooling", "Entertainment", 
                  "Thermal Loads", "Mechanical Loads", "Refrigeration", "Cooking")

# Two-Dimensional Framework (Example) ------------------------------------------

a <- data.frame(year = c("Year x", "Year x + n"),
           accdur = c(0.25, 0.6),
           srvtot = c(0.9, 3.5)) %>% 
  ggplot(aes(accdur, srvtot)) +
  geom_path(arrow = arrow(length = unit(0.1, "inches")), size = 1, alpha = 1,
            show.legend = F) +
  geom_text(aes(x = c(0.12, 0.78), label = year)) +
  scale_x_continuous(breaks = seq(0,1,0.25), limits = c(0,1),
                     labels = scales::percent_format(accuracy = 1)) +
  scale_y_continuous(breaks = seq(0,8,1), limits = c(0,8)) +
  labs(x = "Supply-Access Index", 
       y = "Mean Distinct Energy Services Utilised") +
  theme_bw() +
  theme(strip.background = element_rect(fill = NA, color = "black"),
        plot.caption = element_text(hjust = 1))

b <- data.frame(year = c("Year x", "Year x + n"),
                acc = c(0.6, 0.8),
                dur = c(10, 18)) %>% 
  ggplot(aes(acc, dur)) +
  geom_path(arrow = arrow(length = unit(0.1, "inches")), size = 1, alpha = 1,
            show.legend = F) +
  geom_text(aes(x = c(0.45, 0.55), label = year)) +
  scale_x_continuous(breaks = seq(0,1,0.25), limits = c(0,1),
                     labels = scales::percent_format(accuracy = 1)) +
  scale_y_continuous(breaks = seq(0,24,4), limits = c(0,24)) +
  labs(x = "Access Rate", 
       y = "Mean Supply Availability") +
  theme_bw() +
  theme(strip.background = element_rect(fill = NA, color = "black"),
        plot.caption = element_text(hjust = 1))

a + b

ggsave(here("Manuscript","Figures","2_twodim_example.png"),
       device = "png", width = 6, height = 3, units = "in")

# Two-Dimensional Framework (State) --------------------------------------------

meansrvtot_state <- svydata %>%
  group_by(state, wave) %>%
  summarise(meansrvtot = survey_mean(srv_tot, na.rm = T)) %>% 
  select(-c(meansrvtot_se))

dur_state <- svydata %>% 
  filter(duration_day > 0) %>% 
  group_by(state, wave) %>%
  summarise(meanduration = survey_mean(duration_day, na.rm = T))

acc_state <- svydata %>% 
  group_by(state, wave) %>%
  summarise(meanaccess = survey_mean(useelec == 1))

duracc_state <- merge(dur_state,acc_state) %>% 
  select(-c(meanduration_se, meanaccess_se))

srvduracc_state <- merge(meansrvtot_state, duracc_state)

srvduracc_state <- srvduracc_state %>% 
  mutate(accessindex = meanduration/24 * meanaccess)

srvduracc_state_summary <- srvduracc_state %>% 
  transmute(State = str_to_title(state), 
            Wave = ifelse(wave == 1, "2018", "2015"), 
            "Mean Services" = round(meansrvtot,1),
            "Supply/Access Index" = scales::percent(accessindex, accuracy = 1),
            "Mean Availability" = round(meanduration,1),
            "Access Rate" = scales::percent(meanaccess, accuracy = 1))

stargazer(srvduracc_state_summary %>% filter(Wave == 2015), summary = F, float = F, digits = 1, rownames = F,
          out = here("Manuscript", "Tables", "twodimdescresults_2015.tex"))

stargazer(srvduracc_state_summary %>% filter(Wave == 2018), summary = F, float = F, digits = 1, rownames = F,
          out = here("Manuscript", "Tables", "twodimdescresults_2018.tex"))

p1 <- srvduracc_state %>% 
  ggplot(aes(accessindex, meansrvtot, group = state, colour = state)) +
  geom_line(colour = "grey50") +
  geom_point(aes(shape = state), size = 3,
             data = srvduracc_state %>% filter(wave == 1)) +
  scale_x_continuous(limits = c(0,1), breaks = seq(0,1,0.2),
                     labels = scales::percent) +
  scale_y_continuous(limits = c(0,5), breaks = seq(0,5,1)) +
  labs(x = "Supply-Access Index",
       y = "Mean Distinct Energy Services Utilised",
       colour = "State",
       shape = "State") +
  theme_bw() +
  theme(strip.background = element_rect(fill = NA, color = "black"),
        plot.caption = element_text(hjust = 1))

p2 <- srvduracc_state %>% 
  ggplot(aes(meanaccess, meanduration, group = state, colour = state)) +
  geom_line(colour = "grey50") +
  geom_point(aes(shape = state),  size = 3,
             data = srvduracc_state %>% filter(wave == 1)) +
  scale_y_continuous(limits = c(0,24), breaks = seq(0,24,4)) +
  scale_x_continuous(labels = scales::percent, limits = c(0,1)) +
  labs(y = "Mean Supply Availability",
       x = "Access Rate",
       colour = "State",
       shape = "State") +
  theme_bw() +
  theme(strip.background = element_rect(fill = NA, color = "black"),
        plot.caption = element_text(hjust = 1))

p1 + p2 + plot_layout(guides = "collect") & theme(legend.position = "top")

ggsave(here("Manuscript","Figures","3_twodim_state.png"),
       device = "png", width = 8, height = 4.5, units = "in")

# Disaggregate Analysis --------------------------------------------------------

meanservices_state <- svydata %>%
  group_by(state, wave) %>%
  summarise(meansrvlight = survey_mean(srv_light, na.rm = T),
            meansrvict = survey_mean(srv_ict, na.rm = T),
            meansrvent = survey_mean(srv_ent, na.rm = T),
            meansrvspch = survey_mean(srv_spch, na.rm = T),
            meansrvfrdg = survey_mean(srv_frdg, na.rm = T),
            meansrvmech = survey_mean(srv_mech, na.rm = T),
            meansrvthrm = survey_mean(srv_thrm, na.rm = T),
            meansrvcook = survey_mean(srv_cook, na.rm = T)) %>% 
  select(-c(meansrvlight_se, meansrvict_se, meansrvent_se, 
            meansrvspch_se, meansrvfrdg_se, meansrvmech_se, meansrvthrm_se, 
            meansrvcook_se)) %>% 
  gather(attribute, value, -c(state, wave)) %>% 
  mutate(wave = ifelse(wave == 0, "2015", "2018"))

meanduration_state <- svydata %>% 
  group_by(state, wave) %>% 
  summarise(
    "20 hours" = survey_mean(duration_day >= 20, na.rm = T),
    "16 hours" = survey_mean(duration_day >= 16 & duration_day < 20, na.rm = T),
    "12 hours" = survey_mean(duration_day >= 12 & duration_day < 16, na.rm = T),
    "8 hours" = survey_mean(duration_day >= 8 & duration_day < 12, na.rm = T),
    "4 hours" = survey_mean(duration_day >= 4 & duration_day < 8, na.rm = T),
    "0 hours" = survey_mean(duration_day < 4 | is.na(duration_day), na.rm = T)
  ) %>% 
  select(-matches("se")
         ) %>% 
  mutate(wave = ifelse(wave == 0, "2015", "2018"))

meandurstate_plot <- meanduration_state %>% 
  gather(hours, prop, -c(state, wave)) %>% 
  mutate(hours = factor(hours, levels = c("0 hours", "4 hours", "8 hours", 
                                        "12 hours", "16 hours", "20 hours"))
         ) %>% 
  ggplot(aes(hours, prop, fill = wave, label = scales::percent(prop, accuracy = 1))) +
  geom_col(position = position_dodge(width = 0.6), width = 0.6) +
  geom_text(position = position_dodge(width = 0.6), hjust = -0.1) +
  scale_y_continuous(breaks = seq(0,1,0.25), limits = c(0,1),
                     labels = scales::percent) +
  coord_flip() +
  facet_wrap(~state) +
  labs(x = NULL,
       y = "% Population (Weighted)",
       subtitle = "Minimum Supply Availability",
       fill = NULL) +
  theme_bw() +
  theme(strip.background = element_rect(fill = NA, color = "black"),
        plot.caption = element_text(hjust = 1),
        axis.ticks.x = element_blank()) +
  guides(fill = guide_legend(reverse = TRUE))

meansrvstate_plot <- meanservices_state %>% 
  mutate(attribute = case_when(
           attribute == "meansrvlight" ~ "Lighting",
           attribute == "meansrvict" ~ "ICTs",
           attribute == "meansrvent" ~ "Entertainment",
           attribute == "meansrvspch" ~ "Space Cooling",
           attribute == "meansrvfrdg" ~ "Refrigeration",
           attribute == "meansrvmech" ~ "Mechanical Loads",
           attribute == "meansrvthrm" ~ "Thermal Loads",
           attribute == "meansrvcook" ~ "Cooking"),
         attribute = fct_rev(factor(attribute, levels = levels_india))
  ) %>% 
  ggplot(aes(attribute, value, fill = wave, label = scales::percent(value, accuracy = 1))) +
  geom_col(position = position_dodge(width = 0.6), width = 0.6) +
  geom_text(position = position_dodge(width = 0.6), hjust = -0.1) +
  coord_flip() +
  facet_wrap(~state) +
  scale_y_continuous(limits = c(0,1.05), breaks = seq(0,1,0.2),
                     labels = scales::percent_format(accuracy = 1L)) +
  labs(x = NULL,
       y = "% Population (Weighted)",
       subtitle = "Utilisation Rates",
       fill = NULL) +
  theme_bw() +
  theme(strip.background = element_rect(fill = NA, color = "black"),
        plot.caption = element_text(hjust = 1),
        axis.ticks.x = element_blank()) +
  guides(fill = guide_legend(reverse = TRUE))

meansrvstate_plot + meandurstate_plot + plot_layout(guides = "collect") &
  theme(legend.position = "bottom") & guides(fill = guide_legend(reverse = F))

ggsave(here("Manuscript","Figures","5_services_state.png"),
       device = "png", width = 14, height = 10, units = "in")
